capture log close

*****************************************************************************   
* do-file to create pscores and inverse probability weights from NEPS-SC4 	*
*         "A replication of Ochsenfeld (2016), Patzina & Toussaint"         *										
*****************************************************************************

version 13
clear
set more off

*Definiton of the global path for importing NEPS-SC4 datasets
global data "Z:\SUF\Remote\SC4\SC4_R_10-0-0\Stata14"

*Definition of the global path for reading in Do Files
global file "Y:\"
 
*Definition of the global path for saving datasets and logfiles
global project "Z:\Projects\p000165_DUA_3627\Gender Segregation\"

cd "${project}"
log using PT_pscore_ipw, replace


*** create dataset from multiple panel waves 
	* pTarget (wave 3)
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t66210d t66210h t66210c t44613a t436300
    keep if wave==3
	save "${project}\data_ind_w7.dta", replace
	
	*pTarget (wave 7)
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t66207*
	keep if wave ==7
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	drop if _merge==2 /*drop cases without RIASEC information in wave 7*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	* pTarget (wave 1) migration background
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t400500_g1v1
    keep if wave==1
	gen mig = 0
	replace mig =1 if t400500_g1v1 == 1 | t400500_g1v1 == 2 | t400500_g1v1 == 3 | t400500_g1v1 == 4 | t400500_g1v1 == 5 | t400500_g1v1 == 6
	lab var mig "Migrationshintergrund"
	lab def mig_l 0 "kein Mig." 1 "Mig."
	lab val mig mig_l
	tab mig
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	drop if _merge==1 /*drop cases without RIASEC information in wave 7*/
	drop _merge
	recode mig (.=9)
	save "${project}\data_ind_w7.dta", replace
	
	*pTarget (Socio-Economic Status)
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t731320 t731370
	by ID_t: egen ses_mutter = max(t731320)
	by ID_t: egen ses_vater = max(t731370)
	by ID_t: gen spell=_n
	keep if spell==1
	
	*high SES: mother or father have at least university (of applied sciences) degree
	recode ses_mutter (-54 -90 -92 -95 -98 7 =.)(0 1 2 3 6 =0)(4 5 =1), gen(ses_m)
	recode ses_vater (-54 -90 -92 -95 -98 7 =.)(0 1 2 3 6 =0)(4 5 =1), gen(ses_v)
	gen ses = 0
	replace ses =. if (ses_m == . & ses_v == .)
	replace ses = 1 if (ses_m == 1 | ses_v == 1)
	
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	* CohortProfile (wave 1)
	use "${data}\SC4_CohortProfile_R_10-0-0.dta", clear
	keep ID_t wave tx80501 tx8050y
	keep if wave==1
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge
	save "${project}\data_ind_w7.dta", replace

	* spVocTrain
	use "${data}\SC4_spVocTrain_R_10-0-0.dta", clear
	keep ID_t wave spell ts15404_g1R ts15405_g1R ts15411
	tab ts15404_g1R 
	recode ts15404_g1R (-54 = .)
	gen student_2 = 0 if ts15404_g1R ==.
	replace student_2 = 1 if ts15404_g1R ~=.
	tab student_2
	by ID_t:egen student = max(student_2)
	lab var student "Student vs. alle Nicht-Studenten"
	lab def student_l 0 "kein Student" 1 "Student"
	lab val student student_l
	by ID_t: gen minspell= _n 	
	keep if minspell == 1 /*keep only information of first study program*/
	drop spell minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if (_merge == 3 | _merge==2) /*drop cases without RIASEC information in wave 7*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	*pTarget (grades)
	use "${data}\SC4_pTarget_R_10-0-0.dta"
	keep ID_t wave t724111 t724112 t724101 t724102 t724601 t724602
	drop if (t724111==. | t724111 ==-90 | t724111== -95) & (t724101 ==. | t724101 ==-98 | t724101 ==-95 | t724101 == -90)
	drop if (t724112==. | t724112 ==-90 | t724112==-95) & (t724102 ==. | t724102 ==-98 | t724102 ==-95 | t724102 == -90)
	recode t724111 (-90 -95 -54 -20=.)
	recode t724112 (-90 -95 -54 -20=.)
	recode t724101 (-90 -95 -98 -54 -20 -99=.)
	recode t724102 (-90 -95 -98 -54 -20 -99=.)
	gen deut =t724111 if t724111 ~=.
	replace deut = t724101 if deut==.
	gen math = t724112 if t724112~=.
	replace math = t724102 if math ==.
	recode t724601 (-54 -90 -95 =.)
	recode t724602 (-54 -90 -95 =.)
	drop if deut ==. & t724601==. /*drop spells without information on German grade*/
	drop if math ==. & t724602==. /*drop spells without information on Math grade*/
	by ID_t: gen minspell =_n
	keep if minspell ==1
	drop minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if (_merge ==3 | _merge==2) /*drop cases without RIASEC information in wave 7 and respondents without information in grades*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	*spSchool (fullabi)
	use "${data}\SC4_spSchool_R_10-0-0.dta", clear
	keep ID_t wave spell tf11211 ts11209
	keep if ts11209==5 & tf11211==2 /*keep only repsondents with standard university entrance qualification (allg. HR)*/
    by ID_t: gen minspell= _n
	keep if minspell == 1 /*one spell per individual*/
	drop spell minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if (_merge == 3 | _merge ==2) /*keep only cases with RIASEC information and standard university entrance qualification*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	* weights (for wave 1)
	use "${data}\SC4_Weights_R_10-0-0.dta", clear
	keep ID_t w_t1
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge	
	order ID_t wave 

*gender
recode tx80501 (-97 -54 -55=.)(1=0)(2=1), gen(frau)
drop if frau==.


*major
recode ts15404_g1R(-98 -96 -54=.), gen(B1ber1ab1)
recode ts15405_g1R(-96 -54=.), gen(B1ber2ab1)
drop if (student==1 & B1ber1ab1==. & B1ber2ab1==.)

drop if ts11209==.

* math grades vs german grades
gen math_punkte =((-3*math)+17) /*convert grades into points*/
gen deut_punkte = ((-3*deut)+17)
recode math_punkte deut_punkte (-1=0)
gen dif_math_deut_pkt = (math_punkte - deut_punkte)
tab dif_math_deut_pkt
label variable dif_math_deut_pkt "relative math grade"
drop if dif_math_deut_pkt==.


* RIASEC
recode  t66207a_g1 t66207b_g1 t66207c_g1 t66207d_g1 t66207e_g1 t66207f_g1(-55 -54=.)
gen int_doer = (t66207a_g1 - 1)/4
gen int_thinker = (t66207b_g1 - 1)/4
gen int_creator = (t66207c_g1 - 1)/4
gen int_helper = (t66207d_g1 - 1)/4
gen int_persuader = (t66207e_g1 - 1)/4
gen int_organizer = (t66207f_g1 - 1)/4
drop if int_doer ==.
drop if int_thinker ==.
drop if int_creator ==.
drop if int_helper ==.
drop if int_persuader ==.
drop if int_organizer ==.

* job values
numlabel de789, add
recode t66210d (-90 -95=.)
recode t66210h (-90 -95=.)
recode t66210c (-90 -95=.)
gen flexi_index = (t66210d - 1)/5
*flexible working hours are important	
gen bread_index = (t66210h - 1)/5
*good remuneration is important

recode t436300(4=0)(3=1)(2=2)(1=3)
recode t436300 (-90 -98=.)	
drop if bread_index==.
drop if flexi_index==.

drop if student ==.
recode mig (.=2)
label define x 2 "keine Angabe"
recode ses (.=2)
lab val mig x
lab val ses x

tab student frau

recode bread_index (-11=0)
recode flexi_index (-11=0)

logit student frau i.mig i.ses int_doer int_thinker int_creator int_helper int_persuader int_organizer bread_index flexi_index math_punkte deut_punkte
predict pscore
margins, predict(xb)
egen y_hat_m = mean(pscore)
sum y_hat_m
gen ipt = y_hat_m/pscore if student==1 
replace ipt = (1-y_hat_m)/pscore if student==0
sum ipt, d 

sum pscore if student==0, d
tab frau if (student== 0 & pscore<0.6477078)
tab frau if (student== 0 & pscore>=0.6477078 & pscore <0.7233981)
tab frau if (student== 0 & pscore>=0.7233981 & pscore <0.7859899)
tab frau if (student== 0 & pscore>=0.7859899)

tab frau if pscore<0.6847986
tab frau if (pscore>=0.6847986 & pscore <0.7596181)
tab frau if (pscore>=0.7596181 & pscore <0.8207738)
tab frau if pscore>=0.8207738

*Inverse Probability
logit student frau i.ses i.mig int_doer int_thinker int_creator int_helper int_persuader int_organizer bread_index flexi_index math_punkte deut_punkte
predict p_student, pr
*generate inverse probability 
gen w=.
replace w=1/p_student if student==1
replace w=1/(1-p_student) if student==0
sum w

keep ID_t ipt y_hat_m pscore w student

save "${project}\data_pscore.dta", replace

*** END ***	
log close
exit, clear


